dat %>%
ggplot(aes(year, lakeid, fill=is.na(daynum))) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~metric, nrow=6)
Remaining data issues:
Predict DOC daynum from other variable daynum’s in each northern lake
no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
n_lakes_wide = left_join(no_dups, dat) %>%
select(-sampledate) %>%
pivot_wider(names_from = metric, values_from = daynum) %>%
filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR"))
## Joining, by = c("lakeid", "year", "metric")
hold_data = na.omit(data.frame(n_lakes_wide %>% ungroup() %>% select(-year, -N))) # , -doc_predict
all = lm(doc_epiMax~(iceon+straton+stratoff+energy+
stability+anoxia_summer+
secchi_openwater_max)*lakeid, data=hold_data)
i_o = lm(doc_epiMax~1, data=hold_data)
hold_step = step(i_o, scope=formula(all))
## Start: AIC=1761.44
## doc_epiMax ~ 1
##
## Df Sum of Sq RSS AIC
## + lakeid 6 74137 395186 1733.7
## + stratoff 1 15696 453627 1755.6
## <none> 469323 1761.4
## + secchi_openwater_max 1 1334 467989 1762.8
## + iceon 1 565 468758 1763.2
## + straton 1 531 468792 1763.2
## + anoxia_summer 1 343 468980 1763.3
## + stability 1 143 469180 1763.4
## + energy 1 57 469266 1763.4
##
## Step: AIC=1733.72
## doc_epiMax ~ lakeid
##
## Df Sum of Sq RSS AIC
## + stratoff 1 6445 388741 1731.9
## <none> 395186 1733.7
## + energy 1 3404 391782 1733.7
## + anoxia_summer 1 2414 392772 1734.3
## + iceon 1 595 394591 1735.4
## + straton 1 167 395019 1735.6
## + secchi_openwater_max 1 60 395126 1735.7
## + stability 1 25 395161 1735.7
## - lakeid 6 74137 469323 1761.4
##
## Step: AIC=1731.93
## doc_epiMax ~ lakeid + stratoff
##
## Df Sum of Sq RSS AIC
## + energy 1 4173 384568 1731.4
## <none> 388741 1731.9
## + anoxia_summer 1 2654 386087 1732.3
## + iceon 1 413 388328 1733.7
## - stratoff 1 6445 395186 1733.7
## + straton 1 270 388470 1733.8
## + stability 1 113 388628 1733.9
## + secchi_openwater_max 1 26 388715 1733.9
## + stratoff:lakeid 6 11338 377402 1737.1
## - lakeid 6 64886 453627 1755.6
##
## Step: AIC=1731.43
## doc_epiMax ~ lakeid + stratoff + energy
##
## Df Sum of Sq RSS AIC
## <none> 384568 1731.4
## + anoxia_summer 1 2646 381922 1731.8
## - energy 1 4173 388741 1731.9
## + straton 1 1294 383274 1732.7
## + stability 1 382 384186 1733.2
## + iceon 1 133 384435 1733.3
## + secchi_openwater_max 1 98 384470 1733.4
## - stratoff 1 7214 391782 1733.7
## + stratoff:lakeid 6 10616 373952 1737.0
## + energy:lakeid 6 1353 383215 1742.6
## - lakeid 6 68295 452863 1757.2
doc_model = lm(doc_epiMax~(iceon+straton+stratoff+energy+
stability+anoxia_summer+
secchi_openwater_max)*lakeid,
data=n_lakes_wide,
na.action = na.exclude)
summary(doc_model)
##
## Call:
## lm(formula = doc_epiMax ~ (iceon + straton + stratoff + energy +
## stability + anoxia_summer + secchi_openwater_max) * lakeid,
## data = n_lakes_wide, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -146.219 -17.140 3.587 21.164 118.872
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.300e+01 1.720e+02 0.541 0.5893
## iceon 2.715e-01 3.632e-01 0.748 0.4557
## straton 1.737e-01 3.632e-01 0.478 0.6330
## stratoff 4.151e-01 3.326e-01 1.248 0.2137
## energy -2.294e-01 2.246e-01 -1.021 0.3085
## stability -9.092e-02 1.451e-01 -0.627 0.5318
## anoxia_summer -9.641e-02 1.644e-01 -0.586 0.5583
## secchi_openwater_max -7.737e-02 8.262e-02 -0.936 0.3503
## lakeid.L 1.585e+02 4.570e+02 0.347 0.7292
## lakeid.Q 4.298e+02 4.414e+02 0.974 0.3315
## lakeid.C 3.833e+02 4.587e+02 0.836 0.4044
## lakeid^4 -1.147e+01 4.700e+02 -0.024 0.9806
## lakeid^5 -2.263e+02 4.604e+02 -0.491 0.6237
## lakeid^6 2.994e+02 4.418e+02 0.678 0.4987
## iceon:lakeid.L -1.068e+00 9.637e-01 -1.108 0.2692
## iceon:lakeid.Q -5.440e-01 9.478e-01 -0.574 0.5667
## iceon:lakeid.C -6.462e-01 9.570e-01 -0.675 0.5004
## iceon:lakeid^4 2.688e-01 9.953e-01 0.270 0.7874
## iceon:lakeid^5 2.241e-01 9.502e-01 0.236 0.8138
## iceon:lakeid^6 -2.166e-01 9.514e-01 -0.228 0.8202
## straton:lakeid.L -7.965e-01 9.864e-01 -0.807 0.4204
## straton:lakeid.Q -2.387e-02 8.998e-01 -0.027 0.9789
## straton:lakeid.C -4.402e-02 9.864e-01 -0.045 0.9645
## straton:lakeid^4 9.142e-01 1.025e+00 0.892 0.3736
## straton:lakeid^5 1.250e+00 9.865e-01 1.267 0.2069
## straton:lakeid^6 6.786e-01 8.723e-01 0.778 0.4376
## stratoff:lakeid.L 4.187e-01 8.989e-01 0.466 0.6419
## stratoff:lakeid.Q -4.273e-01 7.939e-01 -0.538 0.5911
## stratoff:lakeid.C -2.607e-01 9.036e-01 -0.288 0.7733
## stratoff:lakeid^4 -4.354e-01 9.746e-01 -0.447 0.6556
## stratoff:lakeid^5 6.122e-01 9.083e-01 0.674 0.5012
## stratoff:lakeid^6 -1.638e+00 7.856e-01 -2.085 0.0384 *
## energy:lakeid.L 1.945e-01 6.025e-01 0.323 0.7471
## energy:lakeid.Q -2.106e-01 5.697e-01 -0.370 0.7121
## energy:lakeid.C -5.028e-02 5.939e-01 -0.085 0.9326
## energy:lakeid^4 -1.263e-01 6.430e-01 -0.196 0.8446
## energy:lakeid^5 -4.185e-01 5.853e-01 -0.715 0.4756
## energy:lakeid^6 3.159e-02 5.681e-01 0.056 0.9557
## stability:lakeid.L 1.413e-03 3.542e-01 0.004 0.9968
## stability:lakeid.Q 3.411e-02 3.761e-01 0.091 0.9278
## stability:lakeid.C -1.242e-01 3.879e-01 -0.320 0.7491
## stability:lakeid^4 -3.883e-01 3.487e-01 -1.113 0.2670
## stability:lakeid^5 3.507e-02 4.190e-01 0.084 0.9334
## stability:lakeid^6 5.326e-02 4.122e-01 0.129 0.8973
## anoxia_summer:lakeid.L 1.285e-01 4.654e-01 0.276 0.7828
## anoxia_summer:lakeid.Q -1.687e-02 4.508e-01 -0.037 0.9702
## anoxia_summer:lakeid.C 4.096e-02 4.174e-01 0.098 0.9219
## anoxia_summer:lakeid^4 1.103e-01 4.878e-01 0.226 0.8213
## anoxia_summer:lakeid^5 -3.720e-01 3.631e-01 -1.025 0.3069
## anoxia_summer:lakeid^6 2.768e-01 4.140e-01 0.669 0.5046
## secchi_openwater_max:lakeid.L 5.459e-01 2.371e-01 2.302 0.0225 *
## secchi_openwater_max:lakeid.Q -4.266e-01 2.379e-01 -1.793 0.0746 .
## secchi_openwater_max:lakeid.C -1.680e-01 2.245e-01 -0.748 0.4552
## secchi_openwater_max:lakeid^4 -7.175e-02 1.943e-01 -0.369 0.7123
## secchi_openwater_max:lakeid^5 -1.379e-01 2.111e-01 -0.653 0.5143
## secchi_openwater_max:lakeid^6 2.000e-01 2.030e-01 0.985 0.3258
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42.56 on 180 degrees of freedom
## (37 observations deleted due to missingness)
## Multiple R-squared: 0.3068, Adjusted R-squared: 0.09503
## F-statistic: 1.449 on 55 and 180 DF, p-value: 0.03679
n_lakes_wide$doc_predict = predict(doc_model, n_lakes_wide)
cor = n_lakes_wide %>%
group_by(lakeid) %>%
summarise(r = paste("r =", round(cor(doc_epiMax, doc_predict, use="complete.obs"), 3)))
n_lakes_wide %>%
ggplot(aes(x=doc_epiMax, y=doc_predict, color=as.character(lakeid))) +
geom_point()+
theme_bw() +
labs(color="lakeid")+
geom_abline(slope=1, intercept = 0) +
facet_wrap(~lakeid) +
geom_text(aes(label=r), data=cor, x=300, y=0) +
labs(x="obs peak DOC (daynum)", y="modeled peak DOC (daynum)")
## Warning: Removed 37 rows containing missing values (`geom_point()`).
# not good but vaguely positive relationship; use this instead of median?
missing_doc = n_lakes_wide %>% filter(is.na(doc_epiMax))
missing_doc$doc_fill = round(predict(doc_model, missing_doc))
doc_fill = missing_doc %>%
select(lakeid, year, doc_fill) %>%
rename(daynum_fill = doc_fill) %>%
mutate(metric = "doc_epiMax") %>%
filter(year < 2000)
all_fill = bind_rows(doc_fill)
dat_filled = full_join(dat, all_fill) %>%
mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, FALSE)) %>%
mutate(daynum_fill = ifelse(is.na(daynum) & !is.na(daynum_fill), daynum_fill, daynum))
## Joining, by = c("lakeid", "metric", "year")
dat_filled$lakeid = factor(dat_filled$lakeid,
levels = c("AL", "BM", "CB", "CR", "SP", "TB", "TR", "FI", "ME", "MO", "WI"),
ordered = T)
vars = c("iceoff", "straton", "secchi_all", "secchi_openwater_max","secchi_openwater_min", "zoopDensity","zoopDensity_CC",
"doc_epiMax","totpuf_epiMax", "totpuf_epiMin", "totpuf_hypoMax", "totpuf_hypoMin",
"anoxia", "anoxia_summer", "stability", "energy", "stratoff", "iceon")
dat_filled$metric = factor(dat_filled$metric,
levels = rev(vars),
ordered=T)
dat_filled %>%
ggplot(aes(year, metric, fill=filled_flag)) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6)
## Additional trimming/filling
Trim to “good” years, fill FI ice data with Monona, then fill additional missing with median values for each lake/metric
df_yearsWant = dat_filled %>%
filter((lakeid %in% c("FI", "ME", "MO", "WI") & year %in% 1996:2018) |
(!(lakeid %in% c("FI", "ME", "MO", "WI")) & year %in% 1982:2018))
fi_icefill = df_yearsWant %>%
filter(lakeid == "MO" & metric %in% c("iceoff", "iceon")) %>%
mutate(lakeid = "FI") %>%
select(-daynum, -filled_flag, -sampledate) %>%
rename(icefill = daynum_fill)
df_yearsWant = df_yearsWant %>%
full_join(fi_icefill) %>%
mutate(daynum_fill = ifelse(is.na(daynum_fill) & !is.na(icefill),
icefill, daynum),
filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill) & !is.na(icefill),
TRUE, filled_flag))
## Joining, by = c("lakeid", "metric", "year")
df_yearsWant %>%
ggplot(aes(year, metric, fill=filled_flag)) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6)
all_lys = df_yearsWant %>%
select(lakeid, year) %>%
distinct()
metric = unique(df_yearsWant$metric)
all_lys_want = expand_grid(all_lys, metric)
dat_final = left_join(all_lys_want, df_yearsWant)
## Joining, by = c("lakeid", "year", "metric")
medians = dat_final %>%
group_by(lakeid, metric) %>%
summarise(median_daynum = median(daynum_fill, na.rm=T))
## `summarise()` has grouped output by 'lakeid'. You can override using the
## `.groups` argument.
dat_final = dat_final %>%
left_join(medians) %>%
mutate(daynum_fill = ifelse(is.na(daynum_fill), median_daynum, daynum_fill)) %>%
mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, filled_flag)) %>%
select(-icefill, -median_daynum)
## Joining, by = c("lakeid", "metric")
dat_final %>%
ggplot(aes(year, metric, fill=filled_flag)) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6)
write_csv(dat_final, "Data/analysis_ready/final_combined_dates_filled_v2.csv")